The 640 MEG Shareware Studio 2

home *** CD-ROM | disk | FTP | other *** search

/ The 640 MEG Shareware Studio 2 / The 640 Meg Shareware Studio CD-ROM Volume II (Data Express)(1993).ISO / clang / nn.zip / DIGEST.C < prev next >

Wrap

C/C++ Source or Header | 1989-12-31 | 10KB | 425 lines

/* * digest article handling */ #include "config.h" #include "news.h" #include "match.h" #include "debug.h" #ifdef DG_TEST #define TEST(fmt, x, y) if (Debug & DG_TEST) printf(fmt, x, y) #else #define TEST(fmt, x, y) #endif /* * test if global 'news' header is header of a digest. * body points to a buffer (NUL term) * containing the first part of the article. */ static char match_digest[128] = { /* NUL SOH STX ETX EOT ENQ ACK BEL BS TAB NL VT FF CR SO SI */ 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */ 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, /* SP ! " # $ % & ' ( ) * + , - . / */ 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 00, 00, 00, 00, 00, 00, /* @ A B C D E F G H I J K L M N O */ 00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, /* P Q R S T U V W X Y Z [ \ ] ^ _ */ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00, /* ` a b c d e f g h i j k l m n o */ 00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, /* p q r s t u v w x y z { | } ~ DEL */ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00 }; static char digest_pattern[] = "digest"; init_digest_parsing() { init_quick_match(digest_pattern); } is_digest(body) register char *body; { char *dpos, *quick_match(); register char *sp; register int l; /* articles without a subject line are not digests (per definition) */ if (news.ng_subj == NULL) return 0; if (dpos = quick_match(news.ng_subj, digest_pattern)) { int lgt = dpos - news.ng_subj; int maxl = 10; /* look for a line matching the subject */ while (*body && maxl) { sp = news.ng_subj; l = lgt; if (*body == *sp && strncmp(body, sp, l) == 0) goto ok; while (*body && *body != NL) { while (*sp && MATCH_DROP(match_digest, *sp)) { if (--l == 0) goto ok; ++sp; } if (MATCH_DROP(match_digest, *body)) { ++body; continue; } if (*sp && MATCH_EQ(match_digest, *body, *sp)) { if (--l == 0) goto ok; ++sp; } ++body; } if (*body) ++body, --maxl; } } return 0; ok: TEST("is_digest: %s\n", news.ng_subj, 0); return 1; } /* * expect that f is positioned at header of an article */ static int is_mmdf_folder = 0; get_digest_article(f, hdrbuf) FILE *f; news_header_buffer hdrbuf; { int cont; digest.dg_hpos = ftell(f); TEST("GET DIGEST hp=%ld\n", digest.dg_hpos, 0); do { if (!parse_digest_header(f, 0, hdrbuf)) return -1; digest.dg_fpos = ftell(f); TEST("END HEADER hp=%ld fp=%ld\n", digest.dg_hpos, digest.dg_fpos); } while ((cont = skip_digest_body(f)) < 0); TEST("END BODY lp=%ld next=%ld\n", digest.dg_lpos, ftell(f)); return cont; } #define BACKUP_LINES 50 /* remember class + offset for parsed lines */ #define LN_BLANK 0x01 /* blank line */ #define LN_DASHED 0x02 /* dash line */ #define LN_HEADER 0x04 /* (possible) header line */ #define LN_ASTERISK 0x08 /* asterisk line (near end) */ #define LN_END_OF 0x10 /* End of ... line */ #define LN_TEXT 0x20 /* unclassified line */ /* * skip until 'Subject: ' (or End of digest) line is found * then backup till start of header */ /* * Tuning parameters: * * MIN_HEADER_LINES: number of known header lines that must * be found in a block to identify a new * header * * MAX_BLANKS_DASH max no of blanks on a 'dash line' * * MIN_DASHES min no of dashes on a 'dash line' * * MAX_BLANKS_ASTERISKS max no of blanks on an 'asterisk line' * * MIN_ASTERISKS min no of asterisks on an 'asterisk line' * * MAX_BLANKS_END_OF max no of blanks before "End of " */ #define MIN_HEADER_LINES 2 #define MAX_BLANKS_DASH 3 #define MIN_DASHES 16 #define MAX_BLANKS_ASTERISK 1 #define MIN_ASTERISKS 10 #define MAX_BLANKS_END_OF 1 skip_digest_body(f) register FILE *f; { off_t backup_p[BACKUP_LINES]; int line_type[BACKUP_LINES]; register int backup_index, backup_count; int more_header_lines, end_or_asterisks, blanks; char line[1024]; register char *cp; char **dg_hdr_field(); #define decrease_index() \ if (--backup_index < 0) backup_index = BACKUP_LINES - 1 backup_index = -1; backup_count = 0; end_or_asterisks = 0; digest.dg_lines = 0; next_line: more_header_lines = 0; next_possible_header_line: digest.dg_lines++; if (++backup_index == BACKUP_LINES) backup_index = 0; if (backup_count < BACKUP_LINES) backup_count++; backup_p[backup_index] = ftell(f); line_type[backup_index] = LN_TEXT; if (fgets(line, 1024, f) == NULL) { TEST("end_of_file, bc=%d, lines=%d\n", backup_count, digest.dg_lines); if (is_mmdf_folder) { digest.dg_lpos = backup_p[backup_index]; is_mmdf_folder = 0; return 0; } /* end of file => look for "****" or "End of" line */ if (end_or_asterisks) while (--backup_count >= 0) { --digest.dg_lines; decrease_index(); if (line_type[backup_index] & (LN_ASTERISK | LN_END_OF)) break; } if (digest.dg_lines == 0) return 0; while (--backup_count >= 0) { --digest.dg_lines; digest.dg_lpos = backup_p[backup_index]; decrease_index(); if ((line_type[backup_index] & (LN_ASTERISK | LN_END_OF | LN_BLANK | LN_DASHED)) == 0) break; } return 0; /* no article follows */ } TEST("\n>>%-.50s ==>>", line, 0); if (line[0] == '\001' && strcmp(line, "\001\001\001\001\n") == 0) { digest.dg_lpos = backup_p[backup_index]; if (!is_mmdf_folder) fseek(f, digest.dg_lpos, 0); --digest.dg_lines; is_mmdf_folder = 0; return (digest.dg_lines <= 0) ? -1 : 1; } if (is_mmdf_folder) goto next_line; for (cp = line; *cp && isascii(*cp) && isspace(*cp); cp++); if (*cp == NUL) { TEST("BLANK", 0, 0); line_type[backup_index] = LN_BLANK; goto next_line; } blanks = cp - line; if (*cp == '-') { if (blanks > MAX_BLANKS_DASH) goto next_line; while (*cp == '-') cp++; if (cp - line - blanks > MIN_DASHES) { while (*cp && (*cp == '-' || (isascii(*cp) && isspace(*cp)))) cp++; if (*cp == NUL) { TEST("DASHED", 0, 0); line_type[backup_index] = LN_DASHED; } } goto next_line; } if (*cp == '*') { if (blanks > MAX_BLANKS_ASTERISK) goto next_line; while (*cp == '*') cp++; if (cp - line - blanks > MIN_ASTERISKS) { while (*cp && (*cp == '*' || (isascii(*cp) && isspace(*cp)))) cp++; if (*cp == NUL) { TEST("ASTERISK", 0, 0); line_type[backup_index] = LN_ASTERISK; end_or_asterisks++; } } goto next_line; } if (blanks <= MAX_BLANKS_END_OF && *cp == 'E' && strncmp(cp, "End of ", 7) == 0) { TEST("END_OF_", 0, 0); line_type[backup_index] = LN_END_OF; end_or_asterisks++; goto next_line; } if (blanks == 0) { if (dg_hdr_field(line, 0)) { TEST("HEADER", 0, 0); line_type[backup_index] = LN_HEADER; if (++more_header_lines < MIN_HEADER_LINES) goto next_possible_header_line; /* found block with MIN_HEADER_LINES */ /* search for beginning of header */ TEST("\nSearch for start of header\n", 0, 0); for (;;) { fseek(f, backup_p[backup_index], 0); --digest.dg_lines; if (--backup_count == 0) break; decrease_index(); if ((line_type[backup_index] & (LN_HEADER | LN_TEXT)) == 0) break; } if (digest.dg_lines == 0) { TEST("Skipped empty article\n", 0, 0); return -1; } for (;;) { digest.dg_lpos = backup_p[backup_index]; if (--backup_count < 0) break; decrease_index(); if ((line_type[backup_index] & (LN_BLANK | LN_DASHED)) == 0) break; --digest.dg_lines; } return (digest.dg_lines == 0) ? -1 : 1; } goto next_possible_header_line; } goto next_line; } parse_digest_header(f, all, hdrbuf) FILE *f; int all; news_header_buffer hdrbuf; { extern char *parse_header(), **dg_hdr_field(); digest.dg_date = digest.dg_from = digest.dg_subj = digest.dg_to = NULL; parse_header(f, dg_hdr_field, all, hdrbuf); return digest.dg_from || digest.dg_subj; } static char **dg_hdr_field(lp, all) register char *lp; int all; { #define check(name, lgt, field) \ if (strncmp(name, lp, lgt) == 0) { \ TEST("MATCH: field ", 0, 0); \ return &digest.field; \ } TEST("\nPARSE[%.20s] ==>> ", lp, 0); switch (*lp++) { case '\001': if (!is_mmdf_folder && strncmp(lp, "\001\001\001\n", 4) == 0) { is_mmdf_folder = 1; digest.dg_hpos += 5; return NULL; } break; case 'D': case 'd': check("ate: ", 5, dg_date); break; case 'F': case 'f': check("rom: ", 5, dg_from); break; case 'R': case 'r': if (!all) break; check("e: ", 3, dg_subj); break; case 'S': case 's': check("ubject", 6, dg_subj); break; case 'T': case 't': check("itle: ", 6, dg_subj); if (!all) break; check("o: ", 3, dg_to); break; } #undef check TEST("NOT MATCHED ", 0, 0); return NULL; }